import pandas as pd
import numpy as np
import math
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.graph_objs as go

from matplotlib import style
from plotly.offline import init_notebook_mode, iplot
from plotly.subplots import make_subplots
from scipy import stats as st
from scipy.stats import levene
from statsmodels.graphics.gofplots import qqplot

(16715, 11)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16715 entries, 0 to 16714
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             16713 non-null  object 
 1   Platform         16715 non-null  object 
 2   Year_of_Release  16446 non-null  float64
 3   Genre            16713 non-null  object 
 4   NA_sales         16715 non-null  float64
 5   EU_sales         16715 non-null  float64
 6   JP_sales         16715 non-null  float64
 7   Other_sales      16715 non-null  float64
 8   Critic_Score     8137 non-null   float64
 9   User_Score       10014 non-null  object 
 10  Rating           9949 non-null   object 
dtypes: float64(6), object(5)
memory usage: 1.4+ MB

Name                  2
Platform              0
Year_of_Release     269
Genre                 2
NA_sales              0
EU_sales              0
JP_sales              0
Other_sales           0
Critic_Score       8578
User_Score         6701
Rating             6766
dtype: int64

0

array(['8', nan, '8.3', '8.5', '6.6', '8.4', '8.6', '7.7', '6.3', '7.4',
       '8.2', '9', '7.9', '8.1', '8.7', '7.1', '3.4', '5.3', '4.8', '3.2',
       '8.9', '6.4', '7.8', '7.5', '2.6', '7.2', '9.2', '7', '7.3', '4.3',
       '7.6', '5.7', '5', '9.1', '6.5', 'tbd', '8.8', '6.9', '9.4', '6.8',
       '6.1', '6.7', '5.4', '4', '4.9', '4.5', '9.3', '6.2', '4.2', '6',
       '3.7', '4.1', '5.8', '5.6', '5.5', '4.4', '4.6', '5.9', '3.9',
       '3.1', '2.9', '5.2', '3.3', '4.7', '5.1', '3.5', '2.5', '1.9', '3',
       '2.7', '2.2', '2', '9.5', '2.1', '3.6', '2.8', '1.8', '3.8', '0',
       '1.6', '9.6', '2.4', '1.7', '1.1', '0.3', '1.5', '0.7', '1.2',
       '2.3', '0.5', '1.3', '0.2', '0.6', '1.4', '0.9', '1', '9.7'],
      dtype=object)

(2424, 11)

(0, 11)

Пропущенных значений в колонке year - 146, что составляет - 0.87%

array(['E', nan, 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP'], dtype=object)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)


import pandas as pd
import numpy as np
import math
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.graph_objs as go

from matplotlib import style
from plotly.offline import init_notebook_mode, iplot
from plotly.subplots import make_subplots
from scipy import stats as st
from scipy.stats import levene
from statsmodels.graphics.gofplots import qqplot


data_game = pd.read_csv('games.csv')


display(data_game.head(), data_game.tail())


data_game.shape

(16715, 11)


data_game.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16715 entries, 0 to 16714
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             16713 non-null  object 
 1   Platform         16715 non-null  object 
 2   Year_of_Release  16446 non-null  float64
 3   Genre            16713 non-null  object 
 4   NA_sales         16715 non-null  float64
 5   EU_sales         16715 non-null  float64
 6   JP_sales         16715 non-null  float64
 7   Other_sales      16715 non-null  float64
 8   Critic_Score     8137 non-null   float64
 9   User_Score       10014 non-null  object 
 10  Rating           9949 non-null   object 
dtypes: float64(6), object(5)
memory usage: 1.4+ MB


display(data_game.describe(),
        #описания категориальных признаков
        data_game.describe(include=[object]))


data_game.isna().sum()

Name                  2
Platform              0
Year_of_Release     269
Genre                 2
NA_sales              0
EU_sales              0
JP_sales              0
Other_sales           0
Critic_Score       8578
User_Score         6701
Rating             6766
dtype: int64


data_game.duplicated().sum()

0


data_game = data_game.rename(columns={'Year_of_Release':'year', 'NA_sales':'na', \
                                      'JP_sales':'jp', 'EU_sales':'eu', 'Other_sales':'other'})
data_game.columns = data_game.columns.str.lower()

array(['8', nan, '8.3', '8.5', '6.6', '8.4', '8.6', '7.7', '6.3', '7.4',
       '8.2', '9', '7.9', '8.1', '8.7', '7.1', '3.4', '5.3', '4.8', '3.2',
       '8.9', '6.4', '7.8', '7.5', '2.6', '7.2', '9.2', '7', '7.3', '4.3',
       '7.6', '5.7', '5', '9.1', '6.5', 'tbd', '8.8', '6.9', '9.4', '6.8',
       '6.1', '6.7', '5.4', '4', '4.9', '4.5', '9.3', '6.2', '4.2', '6',
       '3.7', '4.1', '5.8', '5.6', '5.5', '4.4', '4.6', '5.9', '3.9',
       '3.1', '2.9', '5.2', '3.3', '4.7', '5.1', '3.5', '2.5', '1.9', '3',
       '2.7', '2.2', '2', '9.5', '2.1', '3.6', '2.8', '1.8', '3.8', '0',
       '1.6', '9.6', '2.4', '1.7', '1.1', '0.3', '1.5', '0.7', '1.2',
       '2.3', '0.5', '1.3', '0.2', '0.6', '1.4', '0.9', '1', '9.7'],
      dtype=object)

(2424, 11)

(0, 11)

Пропущенных значений в колонке year - 146, что составляет - 0.87%

array(['E', nan, 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP'], dtype=object)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


data_game = data_game.rename(columns={'Year_of_Release':'year', 'NA_sales':'na', \
                                      'JP_sales':'jp', 'EU_sales':'eu', 'Other_sales':'other'})
data_game.columns = data_game.columns.str.lower()


data_game.user_score.unique()

array(['8', nan, '8.3', '8.5', '6.6', '8.4', '8.6', '7.7', '6.3', '7.4',
       '8.2', '9', '7.9', '8.1', '8.7', '7.1', '3.4', '5.3', '4.8', '3.2',
       '8.9', '6.4', '7.8', '7.5', '2.6', '7.2', '9.2', '7', '7.3', '4.3',
       '7.6', '5.7', '5', '9.1', '6.5', 'tbd', '8.8', '6.9', '9.4', '6.8',
       '6.1', '6.7', '5.4', '4', '4.9', '4.5', '9.3', '6.2', '4.2', '6',
       '3.7', '4.1', '5.8', '5.6', '5.5', '4.4', '4.6', '5.9', '3.9',
       '3.1', '2.9', '5.2', '3.3', '4.7', '5.1', '3.5', '2.5', '1.9', '3',
       '2.7', '2.2', '2', '9.5', '2.1', '3.6', '2.8', '1.8', '3.8', '0',
       '1.6', '9.6', '2.4', '1.7', '1.1', '0.3', '1.5', '0.7', '1.2',
       '2.3', '0.5', '1.3', '0.2', '0.6', '1.4', '0.9', '1', '9.7'],
      dtype=object)

(2424, 11)

(0, 11)

Пропущенных значений в колонке year - 146, что составляет - 0.87%

array(['E', nan, 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP'], dtype=object)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


data_game.user_score.unique()

array(['8', nan, '8.3', '8.5', '6.6', '8.4', '8.6', '7.7', '6.3', '7.4',
       '8.2', '9', '7.9', '8.1', '8.7', '7.1', '3.4', '5.3', '4.8', '3.2',
       '8.9', '6.4', '7.8', '7.5', '2.6', '7.2', '9.2', '7', '7.3', '4.3',
       '7.6', '5.7', '5', '9.1', '6.5', 'tbd', '8.8', '6.9', '9.4', '6.8',
       '6.1', '6.7', '5.4', '4', '4.9', '4.5', '9.3', '6.2', '4.2', '6',
       '3.7', '4.1', '5.8', '5.6', '5.5', '4.4', '4.6', '5.9', '3.9',
       '3.1', '2.9', '5.2', '3.3', '4.7', '5.1', '3.5', '2.5', '1.9', '3',
       '2.7', '2.2', '2', '9.5', '2.1', '3.6', '2.8', '1.8', '3.8', '0',
       '1.6', '9.6', '2.4', '1.7', '1.1', '0.3', '1.5', '0.7', '1.2',
       '2.3', '0.5', '1.3', '0.2', '0.6', '1.4', '0.9', '1', '9.7'],
      dtype=object)


# Загуглив значение tbd - было выясненно - это to be determined, что в переводе "предстоит определить"
data_game.query('user_score == "tbd"').shape

(2424, 11)


data_game.user_score = data_game.user_score.replace('tbd', np.nan).astype('float')
data_game.query('user_score == "tbd"').shape

(0, 11)

Пропущенных значений в колонке year - 146, что составляет - 0.87%

array(['E', nan, 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP'], dtype=object)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


data_game.user_score = data_game.user_score.replace('tbd', np.nan).astype('float')
data_game.query('user_score == "tbd"').shape

(0, 11)


data_game.year = data_game.year.astype('Int64')
data_game.critic_score = data_game.critic_score.astype('Int64')


# Начнем по-порядку. В колонке [name] всего два пропуска, проверим...
data_game.query('name != name')


data_game.query('year.isnull()')['name'].reset_index().head(3)


# Красным подмечено пустые значения
display(data_game.query('name == "Madden NFL 2004"').style.highlight_null('red'), 
        data_game.query('name == "FIFA Soccer 2004"').style.highlight_null('red'), 
        data_game.query('name == "LEGO Batman: The Videogame"').style.highlight_null('red'))


def fill_year (data_game, year, name):
    for index in data_game[name].unique():
        data_game.loc[(data_game[year].isna())&(data_game[name] == index), year] = \
        data_game.loc[data_game[name] == index, year].max()
    return data_game

fill_year(data_game, 'year', 'name')
print(f'Пропущенных значений в колонке year - {data_game.year.isna().sum()}, что составляет - \
{round(data_game.year.isna().mean()*100, 2)}%')

Пропущенных значений в колонке year - 146, что составляет - 0.87%


display(data_game.query('year.isnull()')['name'].reset_index().head(2), data_game.query('name == "Rock Band"'))


# Предлагаю удалить все строки с пропусками в name, year и genre.
data_game.dropna(subset=['name', 'year', 'genre'], inplace=True)


data_game.critic_score = data_game.critic_score.fillna(0)
data_game.user_score = data_game.user_score.fillna(0)

array(['E', nan, 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP'], dtype=object)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


data_game.critic_score = data_game.critic_score.fillna(0)
data_game.user_score = data_game.user_score.fillna(0)


# Просмотрим уникальные значения колонки [rating]
data_game.rating.unique()

array(['E', nan, 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP'], dtype=object)


data_game.rating = data_game.rating.fillna('Indefinite')

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


data_game.rating = data_game.rating.fillna('Indefinite')


# Запишим в отдельную колонку сумму всех продаж по всем регионам
data_game['total_sales'] = data_game[['na','eu','jp','other']].sum(axis = 1)


def create_any_bar(groupby_column, func, y='name'):
    plt.style.use('seaborn-ticks')
    game_plot = data_game.groupby(groupby_column)[y]
    if func == 'count':
        game_plot_calculated = game_plot.count()
        figsize = (15,5)
        plt.ylabel('Количество в млн. шт.')
        plot = game_plot_calculated.plot(kind='bar', y=y, figsize=figsize, ec='black', title = 'Количество продаж по годам')
    elif func == 'sum':
        game_plot_calculated = game_plot.sum().sort_values()
        figsize = (15,10)
        plt.xlabel('Количество в млн. шт.')
        plot = game_plot_calculated.plot(kind='barh', y=y, figsize=figsize, ec='black', title = 'Сумма продаж по платформам')

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


def create_any_bar(groupby_column, func, y='name'):
    plt.style.use('seaborn-ticks')
    game_plot = data_game.groupby(groupby_column)[y]
    if func == 'count':
        game_plot_calculated = game_plot.count()
        figsize = (15,5)
        plt.ylabel('Количество в млн. шт.')
        plot = game_plot_calculated.plot(kind='bar', y=y, figsize=figsize, ec='black', title = 'Количество продаж по годам')
    elif func == 'sum':
        game_plot_calculated = game_plot.sum().sort_values()
        figsize = (15,10)
        plt.xlabel('Количество в млн. шт.')
        plot = game_plot_calculated.plot(kind='barh', y=y, figsize=figsize, ec='black', title = 'Сумма продаж по платформам')


create_any_bar('year', 'count')


# Агрегируем данные и посмотрим на динамику продаж в макрорегиионах
df = data_game.groupby('year', as_index=False).agg({'na':'sum', 'eu':'sum', 'jp':'sum', 'other':'sum'})
fig = px.line(df, x='year', y=['na', 'eu', 'jp', 'other'], template='plotly_white', title='Динамика продаж по макрорегионам')
fig.show()


create_any_bar('platform', 'sum', 'total_sales')

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


create_any_bar('platform', 'sum', 'total_sales')


# Произведем расчет общих продаж: построим сводную таблицу по платформам
data_platforms = data_game.pivot_table(index=['platform'],
                                       values= 'total_sales',
                                       aggfunc='sum') \
                          .sort_values(by='total_sales', ascending =False).reset_index()

display(data_platforms.head(15).style.background_gradient(sns.light_palette("brown", as_cmap=True)))

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


# Произведем расчет общих продаж: построим сводную таблицу по платформам
data_platforms = data_game.pivot_table(index=['platform'],
                                       values= 'total_sales',
                                       aggfunc='sum') \
                          .sort_values(by='total_sales', ascending =False).reset_index()

display(data_platforms.head(15).style.background_gradient(sns.light_palette("brown", as_cmap=True)))


''' Проанализируем данные начала 21-го века 5-лидирующих платформ: с периода 2000 по 2016 г.г.
    Именно с этого момента начинается уверенный рост рынка видеоигр '''

data_2000 = data_game[data_game['year'] >= 2000]

''' Посмотрим, как менялись продажи по платформам. Выберем платформы с наибольшими суммарными продажами и 
построим распределение по годам. Ответим на вопрос: за какой характерный срок появляются новые и исчезают
старые платформы? '''

# Возьмем индексы и преобразуем их в лист
platforms = data_2000.groupby('platform')['total_sales'].sum().sort_values(ascending=False)[:5].index.tolist()
data = []

for index in platforms:
    data.append(go.Bar(x=data_2000[data_2000.platform == index].groupby("year")['total_sales'].sum().index,
                       y=data_2000[data_2000.platform == index].groupby("year")['total_sales'].sum(), name=index))
    
layout = {'title': 'Продажи по годам'}
fig = go.Figure(data=data, layout=layout)
fig.layout.template = 'plotly_white'
fig.update_layout(legend_title_text = "Платформа")
fig.update_xaxes(title_text="Год")
fig.update_yaxes(title_text="Уровень продаж")
fig.show()


data_2013 = data_game[data_game['year'] >= 2013]

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


data_2013 = data_game[data_game['year'] >= 2013]


data = []

for index in data_2013.platform.unique():
    data.append(go.Bar(x=data_2013[data_2013.platform == index].groupby("year")['total_sales'].sum().index,
                       y=data_2013[data_2013.platform == index].groupby("year")['total_sales'].sum(), name=index))

    
layout = {'title': 'Продажи по годам'} 

fig = go.Figure(data=data, layout=layout)
fig.layout.template = 'plotly_white'

fig.update_layout(legend_title_text = "Платформа")
fig.update_xaxes(title_text="Год")
fig.update_yaxes(title_text="Уровень продаж")
fig.show()


#Выделим шесть топовых платформ с которыми мы будем дальше работать
data_box = data_2013.query('platform in ("PS4", "XOne", "PC", "WiiU", "3DS", "PSV")') \
                    .pivot_table(index=['name', 'platform', 'year'], 
                                 values='total_sales').sort_values(by='platform').reset_index()
data_box.head(10)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


#Выделим шесть топовых платформ с которыми мы будем дальше работать
data_box = data_2013.query('platform in ("PS4", "XOne", "PC", "WiiU", "3DS", "PSV")') \
                    .pivot_table(index=['name', 'platform', 'year'], 
                                 values='total_sales').sort_values(by='platform').reset_index()
data_box.head(10)


data = []

for index in data_box.platform.unique():
    data.append(go.Box(y=data_box[data_box.platform == index]['total_sales'], name=index ))
    
layout = {'title': 'Продажи по платформам'}
fig = go.Figure(data=data, layout=layout)

fig.update_layout(yaxis=dict(range=[0,1])) # Изменим масштаб по оси y
fig.layout.template = 'plotly_white'

# Тут можем поиграться боксплотом
iplot(fig, show_link = True)


# Выбираю PS4 - это самая популярная, и успешная платформа
sns.pairplot(data_game[data_game.platform == "PS4"][['total_sales', 'critic_score', 'user_score']])
plt.show()

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


# Выбираю PS4 - это самая популярная, и успешная платформа
sns.pairplot(data_game[data_game.platform == "PS4"][['total_sales', 'critic_score', 'user_score']])
plt.show()

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:499: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)

/home/jupyter-zurab/.local/lib/python3.9/site-packages/seaborn/distributions.py:500: FutureWarning:

In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)


# Применим корреляцию Спирмена, чтобы отбросить нулевые выбросы
data_game[data_game.platform == "PS4"][['total_sales', 'critic_score', 'user_score']].corr(method='spearman')

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


# Применим корреляцию Спирмена, чтобы отбросить нулевые выбросы
data_game[data_game.platform == "PS4"][['total_sales', 'critic_score', 'user_score']].corr(method='spearman')


other_platforms = ['PS4','XOne','PC','WiiU','3DS', 'PSV']

rows = 3
cols = 2
fig, axes = plt.subplots(rows, cols, figsize=(15,10))

# Устанавливаем счетчик, для перебора платформ в списке через индекс
count = 0
for a in range(rows):
    for b in range(cols):
        # перебираем по индексу
        index = other_platforms[count]
        df = data_game[(data_game['platform'] == index) & (data_game['user_score'] > 0)]
        
        df_filtered = df[['total_sales' ,'critic_score', 'user_score']]
        sns.set(font_scale=1.0)
        ax = sns.heatmap(df_filtered.corr()[['total_sales']].sort_values(by='total_sales', ascending=False), 
                         cmap="Blues", annot=True, annot_kws={'size':15}, ax=axes[a,b])
        ax.set_title(index, fontsize=20)
        ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
        plt.tight_layout(pad=5)
        count += 1

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


other_platforms = ['PS4','XOne','PC','WiiU','3DS', 'PSV']

rows = 3
cols = 2
fig, axes = plt.subplots(rows, cols, figsize=(15,10))

# Устанавливаем счетчик, для перебора платформ в списке через индекс
count = 0
for a in range(rows):
    for b in range(cols):
        # перебираем по индексу
        index = other_platforms[count]
        df = data_game[(data_game['platform'] == index) & (data_game['user_score'] > 0)]
        
        df_filtered = df[['total_sales' ,'critic_score', 'user_score']]
        sns.set(font_scale=1.0)
        ax = sns.heatmap(df_filtered.corr()[['total_sales']].sort_values(by='total_sales', ascending=False), 
                         cmap="Blues", annot=True, annot_kws={'size':15}, ax=axes[a,b])
        ax.set_title(index, fontsize=20)
        ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
        plt.tight_layout(pad=5)
        count += 1


ax = plt.gca()
ratio_genres = data_2013.groupby('genre') \
                        .agg({'name': 'count', 'total_sales': 'sum'}) \
                        .sort_values(by='name', ascending=False)
games = ratio_genres['name']
games.plot(kind='bar', figsize=(15,5), ec='black', ax=ax, width=0.2, position=1)

sales = ratio_genres['total_sales']
sales.plot(kind='bar', figsize=(15,5), ec='black', ax=ax, width=0.2, color='#97F0AA', position=0)

ax.legend(['Количество продаж', 'Общая сумма продаж'])

ratio_genres['ratio'] = ratio_genres['total_sales'] / ratio_genres['name']
ratio_genres.sort_values(by='ratio', ascending=False).reset_index() \
.style.background_gradient(sns.light_palette("brown", as_cmap=True))

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


ax = plt.gca()
ratio_genres = data_2013.groupby('genre') \
                        .agg({'name': 'count', 'total_sales': 'sum'}) \
                        .sort_values(by='name', ascending=False)
games = ratio_genres['name']
games.plot(kind='bar', figsize=(15,5), ec='black', ax=ax, width=0.2, position=1)

sales = ratio_genres['total_sales']
sales.plot(kind='bar', figsize=(15,5), ec='black', ax=ax, width=0.2, color='#97F0AA', position=0)

ax.legend(['Количество продаж', 'Общая сумма продаж'])

ratio_genres['ratio'] = ratio_genres['total_sales'] / ratio_genres['name']
ratio_genres.sort_values(by='ratio', ascending=False).reset_index() \
.style.background_gradient(sns.light_palette("brown", as_cmap=True))


# Построим боксплот
data_box = data_2013.query('platform in ("PS4", "XOne", "PC", "WiiU", "3DS", "PSV")') \
                    .pivot_table(index=['name', 'platform', 'genre'], 
                                 values='total_sales').sort_values(by='platform').reset_index()
data = []

for index in data_box.genre.unique():
    data.append(go.Box(y=data_box[data_box.genre == index]['total_sales'], name=index ))
    
layout = {'title': 'Продажи по жанрам'}
fig = go.Figure(data=data, layout=layout)

fig.update_layout(yaxis=dict(range=[0,2])) # Изменим масштаб по оси y
fig.layout.template = 'plotly_white'

iplot(fig, show_link = False)


# Группировка данных ТОП5 платформ по продажам в разрезе рынков
na = data_game[data_game.year > 2012].groupby('platform')['na'].agg(na='sum').nlargest(5, 'na').reset_index()
eu = data_game[data_game.year > 2012].groupby('platform')['eu'].agg(eu='sum').nlargest(5, 'eu').reset_index()
jp = data_game[data_game.year > 2012].groupby('platform')['jp'].agg(jp='sum').nlargest(5, 'jp').reset_index()

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


# Группировка данных ТОП5 платформ по продажам в разрезе рынков
na = data_game[data_game.year > 2012].groupby('platform')['na'].agg(na='sum').nlargest(5, 'na').reset_index()
eu = data_game[data_game.year > 2012].groupby('platform')['eu'].agg(eu='sum').nlargest(5, 'eu').reset_index()
jp = data_game[data_game.year > 2012].groupby('platform')['jp'].agg(jp='sum').nlargest(5, 'jp').reset_index()

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


# Группировка данных ТОП5 платформ по продажам в разрезе рынков
na = data_game[data_game.year > 2012].groupby('platform')['na'].agg(na='sum').nlargest(5, 'na').reset_index()
eu = data_game[data_game.year > 2012].groupby('platform')['eu'].agg(eu='sum').nlargest(5, 'eu').reset_index()
jp = data_game[data_game.year > 2012].groupby('platform')['jp'].agg(jp='sum').nlargest(5, 'jp').reset_index()


# График продаж по платформам в разрезе рынков
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('Продажи по платформам в разрезе рынков')

sns.barplot(y='na', x='platform', data=na, ax=ax[0], palette='pastel')
ax[0].set_title('Северная Америка')
ax[0].set_ylabel('продажи, млн.шт.')
ax[0].set_xlabel('платформа')

sns.barplot(y='eu', x='platform', data=eu, ax=ax[1], palette='pastel')
ax[1].set_title('Европа')
ax[1].set_ylabel('продажи, млн.шт.')
ax[1].set_xlabel('платформа')

sns.barplot(y='jp', x='platform', data=jp, ax=ax[2], palette='pastel')
ax[2].set_title('Япония')
ax[2].set_ylabel('продажи, млн.шт.')
ax[2].set_xlabel('платформа')
plt.show()


# Группировка данных ТОП5 жанров по продажам в разрезе рынков
na_genres = data_game[data_game.year > 2012].groupby('genre')['na'].agg(na='sum').nlargest(5, 'na').reset_index()
eu_genres = data_game[data_game.year > 2012].groupby('genre')['eu'].agg(eu='sum').nlargest(5, 'eu').reset_index()
jp_genres = data_game[data_game.year > 2012].groupby('genre')['jp'].agg(jp='sum').nlargest(5, 'jp').reset_index()

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


# Группировка данных ТОП5 жанров по продажам в разрезе рынков
na_genres = data_game[data_game.year > 2012].groupby('genre')['na'].agg(na='sum').nlargest(5, 'na').reset_index()
eu_genres = data_game[data_game.year > 2012].groupby('genre')['eu'].agg(eu='sum').nlargest(5, 'eu').reset_index()
jp_genres = data_game[data_game.year > 2012].groupby('genre')['jp'].agg(jp='sum').nlargest(5, 'jp').reset_index()


# График продаж по жанрам в разрезе рынков
fig, ax = plt.subplots(1, 3, figsize=(20, 5))
fig.suptitle('Продажи по жанрам в разрезе рынков')

sns.barplot(y='na', x='genre', data=na_genres, ax=ax[0], palette='pastel')
ax[0].set_title('Северная Америка')
ax[0].set_ylabel('продажи, млн.шт.')
ax[0].set_xlabel('жанр')

sns.barplot(y='eu', x='genre', data=eu_genres, ax=ax[1], palette='pastel')
ax[1].set_title('Европа')
ax[1].set_ylabel('продажи, млн.шт.')
ax[1].set_xlabel('жанр')

sns.barplot(y='jp', x='genre', data=jp_genres, ax=ax[2], palette='pastel')
ax[2].set_title('Япония')
ax[2].set_ylabel('продажи, млн.шт.')
ax[2].set_xlabel('жанр')
plt.show()


na_rating = data_game[data_game.year > 2013].groupby('rating')['na'].agg(na_sales='sum').reset_index()
eu_rating = data_game[data_game.year > 2013].groupby('rating')['eu'].agg(eu_sales='sum').reset_index()
jp_rating = data_game[data_game.year > 2013].groupby('rating')['jp'].agg(jp_sales='sum').reset_index()

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


na_rating = data_game[data_game.year > 2013].groupby('rating')['na'].agg(na_sales='sum').reset_index()
eu_rating = data_game[data_game.year > 2013].groupby('rating')['eu'].agg(eu_sales='sum').reset_index()
jp_rating = data_game[data_game.year > 2013].groupby('rating')['jp'].agg(jp_sales='sum').reset_index()


fig, ax = plt.subplots(1,3, figsize=(20, 6))
fig.suptitle('Продажи по рейтингам ESRB в разрезе рынков')
data1, categories1 = na_rating['na_sales'], na_rating['rating']
data2, categories2 = eu_rating['eu_sales'], eu_rating['rating']
data3, categories3 = jp_rating['jp_sales'], jp_rating['rating']
colors = ['#006699','#009933','#FF9900','#996600', '#CC0000']

def func(pct, allvals):
    absolute = int(pct/100.*np.sum(allvals))
    return "{:.1f}% ({:d} млн )".format(pct, absolute)

wedges, texts, autotexts = ax[0].pie(data1 ,autopct=lambda pct: func(pct, data1), textprops=dict(color="w"), colors=colors)
ax[0].set_title("Северная Америка")
ax[0].legend(wedges, categories1, title="рейтинг", loc="upper right")
plt.setp(autotexts, size=8, weight=850)

wedges, texts, autotexts = ax[1].pie(data2 ,autopct=lambda pct: func(pct, data2), textprops=dict(color="w"),colors=colors)
ax[1].set_title("Европа")
ax[1].legend(wedges, categories2, title="рейтинг", loc="upper right")
plt.setp(autotexts, size=8, weight=850)

wedges, texts, autotexts = ax[2].pie(data3 ,autopct=lambda pct: func(pct, data3), startangle = 65, textprops=dict(color="w"),colors=colors)
ax[2].set_title("Япония")
ax[2].legend(wedges, categories3, title="рейтинг", loc="upper right")
plt.setp(autotexts, size=8, weight=850)
plt.show()


sample_xone = data_game[data_game.year > 2013].query('platform == "XOne" & user_score != 0')['user_score'] # Оценки пользователей Xbox One
sample_pc = data_game[data_game.year > 2013].query('platform == "PC" & user_score != 0')['user_score']
print('Размер выборки Xbox One:',len(sample_xone))
print('Размер выборки PC:',len(sample_pc))

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


sample_xone = data_game[data_game.year > 2013].query('platform == "XOne" & user_score != 0')['user_score'] # Оценки пользователей Xbox One
sample_pc = data_game[data_game.year > 2013].query('platform == "PC" & user_score != 0')['user_score']
print('Размер выборки Xbox One:',len(sample_xone))
print('Размер выборки PC:',len(sample_pc))

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


sample_xone = data_game[data_game.year > 2013].query('platform == "XOne" & user_score != 0')['user_score'] # Оценки пользователей Xbox One
sample_pc = data_game[data_game.year > 2013].query('platform == "PC" & user_score != 0')['user_score']
print('Размер выборки Xbox One:',len(sample_xone))
print('Размер выборки PC:',len(sample_pc))

Размер выборки Xbox One: 165
Размер выборки PC: 123

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


sample_xone = data_game[data_game.year > 2013].query('platform == "XOne" & user_score != 0')['user_score'] # Оценки пользователей Xbox One
sample_pc = data_game[data_game.year > 2013].query('platform == "PC" & user_score != 0')['user_score']
print('Размер выборки Xbox One:',len(sample_xone))
print('Размер выборки PC:',len(sample_pc))

Размер выборки Xbox One: 165
Размер выборки PC: 123


fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Распределение значений оценки пользователей')

sns.histplot(sample_xone, ax=axes[0,0], kde=False, bins=25)
axes[0,0].set_title('XOne')
axes[0,0].set_xlabel('Оценка')
axes[0,0].set_ylabel('Частота')

sns.histplot(sample_pc, ax=axes[0,1], kde=False, bins=25)
axes[0,1].set_title('PC')
axes[0,1].set_xlabel('Оценка')
axes[0,1].set_ylabel('Частота')

# “s” - стандартизированная строка, ожидаемая статистика заказов масштабируется на стандартное
# отклонение данной выборки и к ним добавляется среднее значение
qqplot(sample_xone, line='s', ax=axes[1,0])
qqplot(sample_pc, line='s', ax=axes[1,1])
plt.show()


alpha = 0.05 # критический уровень статистической значимости
            # если p-value окажется меньше него - отвергнем гипотезу

results_1 = st.ttest_ind(
    sample_xone, 
    sample_pc,
    equal_var = False)

print('p-значение:', results_1.pvalue)

if (results_1.pvalue < alpha):
    print("Отвергаем нулевую гипотезу")
else:
    print("Не получилось отвергнуть нулевую гипотезу")

p-значение: 0.139237120435483
Не получилось отвергнуть нулевую гипотезу


sample_action = data_game[data_game.year > 2013].query('genre == "Action" & user_score != 0')['user_score']
sample_sports = data_game[data_game.year > 2013].query('genre == "Sports" & user_score != 0')['user_score']
print('Размер выборки Action:',len(sample_action))
print('Размер выборки Sports:',len(sample_sports))

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


sample_action = data_game[data_game.year > 2013].query('genre == "Action" & user_score != 0')['user_score']
sample_sports = data_game[data_game.year > 2013].query('genre == "Sports" & user_score != 0')['user_score']
print('Размер выборки Action:',len(sample_action))
print('Размер выборки Sports:',len(sample_sports))

Размер выборки Action: 298
Размер выборки Sports: 127

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу


sample_action = data_game[data_game.year > 2013].query('genre == "Action" & user_score != 0')['user_score']
sample_sports = data_game[data_game.year > 2013].query('genre == "Sports" & user_score != 0')['user_score']
print('Размер выборки Action:',len(sample_action))
print('Размер выборки Sports:',len(sample_sports))

Размер выборки Action: 298
Размер выборки Sports: 127


fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Распределение значений оценки пользователей')

sns.histplot(sample_action, ax=axes[0,0], kde=False, bins=25)
axes[0,0].set_title('Action')
axes[0,0].set_xlabel('Оценка')
axes[0,0].set_ylabel('Частота')

sns.histplot(sample_sports, ax=axes[0,1], kde=False, bins=25)
axes[0,1].set_title('Sports')
axes[0,1].set_xlabel('Оценка')
axes[0,1].set_ylabel('Частота')

# “s” - стандартизированная строка, ожидаемая статистика заказов масштабируется на стандартное
# отклонение данной выборки и к ним добавляется среднее значение
qqplot(sample_action, line='s', ax=axes[1,0])
qqplot(sample_sports, line='s', ax=axes[1,1])
plt.show()


alpha = 0.05 # критический уровень статистической значимости
            # если p-value окажется меньше него - отвергнем гипотезу

results_2 = st.ttest_ind(
    sample_action, 
    sample_sports,
    equal_var = False)

print('p-значение:', results_2.pvalue)

if (results_2.pvalue < alpha):
    print("Отвергаем нулевую гипотезу")
else:
    print("Не получилось отвергнуть нулевую гипотезу")

p-значение: 9.658176026869598e-15
Отвергаем нулевую гипотезу

	Name	Platform	Year_of_Release	Genre	NA_sales	EU_sales	JP_sales	Other_sales	Critic_Score	User_Score	Rating
0	Wii Sports	Wii	2006.0	Sports	41.36	28.96	3.77	8.45	76.0	8	E
1	Super Mario Bros.	NES	1985.0	Platform	29.08	3.58	6.81	0.77	NaN	NaN	NaN
2	Mario Kart Wii	Wii	2008.0	Racing	15.68	12.76	3.79	3.29	82.0	8.3	E
3	Wii Sports Resort	Wii	2009.0	Sports	15.61	10.93	3.28	2.95	80.0	8	E
4	Pokemon Red/Pokemon Blue	GB	1996.0	Role-Playing	11.27	8.89	10.22	1.00	NaN	NaN	NaN

	Name	Platform	Year_of_Release	Genre	NA_sales	EU_sales	JP_sales	Critic_Score	User_Score	Rating
16710	Samurai Warriors: Sanada Maru	PS3	2016.0	Action	0.00	0.00	0.01	NaN	NaN	NaN
16711	LMA Manager 2007	X360	2006.0	Sports	0.00	0.01	0.00	NaN	NaN	NaN
16712	Haitaka no Psychedelica	PSV	2016.0	Adventure	0.00	0.00	0.01	NaN	NaN	NaN
16713	Spirits & Spells	GBA	2003.0	Platform	0.01	0.00	0.00	NaN	NaN	NaN
16714	Winning Post 8 2016	PSV	2016.0	Simulation	0.00	0.00	0.01	NaN	NaN	NaN

	Year_of_Release	NA_sales	EU_sales	JP_sales	Other_sales	Critic_Score
count	16446.000000	16715.000000	16715.000000	16715.000000	16715.000000	8137.000000
mean	2006.484616	0.263377	0.145060	0.077617	0.047342	68.967679
std	5.877050	0.813604	0.503339	0.308853	0.186731	13.938165
min	1980.000000	0.000000	0.000000	0.000000	0.000000	13.000000
25%	2003.000000	0.000000	0.000000	0.000000	0.000000	60.000000
50%	2007.000000	0.080000	0.020000	0.000000	0.010000	71.000000
75%	2010.000000	0.240000	0.110000	0.040000	0.030000	79.000000
max	2016.000000	41.360000	28.960000	10.220000	10.570000	98.000000

	name	platform	year	genre	na	eu	jp	other	critic_score	user_score	rating
659	NaN	GEN	1993	NaN	1.78	0.53	0.00	0.08	<NA>	NaN	NaN
14244	NaN	GEN	1993	NaN	0.00	0.00	0.03	0.00	<NA>	NaN	NaN

	name	platform	year	genre	na	eu	jp	other	critic_score	user_score	rating
183	Madden NFL 2004	PS2		Sports	4.260000	0.260000	0.010000	0.710000	94	8.500000	E
1881	Madden NFL 2004	XB	2003	Sports	1.020000	0.020000	0.000000	0.050000	92	8.300000	E
3889	Madden NFL 2004	GC	2003	Sports	0.400000	0.100000	0.000000	0.010000	94	7.700000	E
5708	Madden NFL 2004	GBA	2003	Sports	0.220000	0.080000	0.000000	0.010000	70	6.600000	E

Описание проекта

Подготовка данных¶

Исследовательский анализ данных¶

Составьте портрет пользователя каждого региона¶

Проверьте гипотезы¶

Общий вывод¶

	Name	Platform	Genre	User_Score	Rating
count	16713	16715	16713	10014	9949
unique	11559	31	12	96	8
top	Need for Speed: Most Wanted	PS2	Action	tbd	E
freq	12	2161	3369	2424	3990

	index	name
0	183	Madden NFL 2004
1	377	FIFA Soccer 2004
2	456	LEGO Batman: The Videogame

	name	platform	year	genre	na	eu	jp	other	critic_score	user_score	rating
377	FIFA Soccer 2004	PS2		Sports	0.590000	2.360000	0.040000	0.510000	84	6.400000	E
2606	FIFA Soccer 2004	XB	2003	Sports	0.240000	0.490000	0.000000	0.050000	82	8.200000	E
12029	FIFA Soccer 2004	GC	2003	Sports	0.050000	0.010000	0.000000	0.000000	83	6.200000	E
13086	FIFA Soccer 2004	GBA	2003	Sports	0.040000	0.010000	0.000000	0.000000	82	7.900000	E

	name	platform	year	genre	na	eu	other	critic_score	user_score	rating
397	LEGO Batman: The Videogame	X360	2008	Action	2.040000	1.020000	0.320000	76	7.900000	E10+
456	LEGO Batman: The Videogame	Wii		Action	1.800000	0.970000	0.290000	74	7.900000	E10+
460	LEGO Batman: The Videogame	DS	2008	Action	1.750000	1.010000	0.290000	72	8.000000	E10+
1519	LEGO Batman: The Videogame	PS3	2008	Action	0.720000	0.390000	0.190000	75	7.700000	E10+
1538	LEGO Batman: The Videogame	PSP		Action	0.570000	0.440000	0.270000	73	7.400000	E10+
1553	LEGO Batman: The Videogame	PS2	2008	Action	0.720000	0.030000	0.520000	77	8.900000	E10+
12465	LEGO Batman: The Videogame	PC	2008	Action	0.020000	0.030000	0.010000	80	7.800000	E10+

	name	platform	year	genre	na	eu	other	critic_score	user_score	rating
627	Rock Band	X360	<NA>	Misc	1.93	0.33	0.21	92	8.2	T
805	Rock Band	Wii	<NA>	Misc	1.33	0.56	0.20	80	6.3	T
1142	Rock Band	PS3	<NA>	Misc	0.99	0.41	0.22	92	8.4	T
1840	Rock Band	PS2	<NA>	Misc	0.71	0.06	0.35	82	6.8	T

	platform	total_sales
0	PS2	1247.160000
1	X360	966.610000
2	PS3	935.190000
3	Wii	903.310000
4	DS	804.280000
5	PS	727.580000
6	PS4	314.140000
7	GBA	313.730000
8	PSP	293.570000
9	PC	258.860000
10	3DS	258.530000
11	XB	256.690000
12	GB	254.430000
13	NES	251.050000
14	N64	218.480000

	name	platform	year	total_sales
0	Beyblade Burst	3DS	2016	0.03
1	Rodea the Sky Soldier	3DS	2015	0.03
2	Romance of the Three Kingdoms (3DS)	3DS	2013	0.03
3	Romance of the Three Kingdoms II	3DS	2015	0.03
4	Dream Girl Premier	3DS	2015	0.03
5	Dragon Quest X	3DS	2014	0.17
6	Dragon Quest VIII: Journey of the Cursed King	3DS	2015	0.86
7	Dragon Quest VII: Warriors of Eden	3DS	2013	1.46
8	Dragon Quest Monsters Joker 3	3DS	2016	0.63
9	Dragon Quest Monsters 2	3DS	2014	0.79

	total_sales	critic_score	user_score
total_sales	1.000000	0.414235	0.175916
critic_score	0.414235	1.000000	0.797199
user_score	0.175916	0.797199	1.000000

	genre	name	total_sales	ratio
0	Shooter	187	232.980000	1.245882
1	Sports	214	150.650000	0.703972
2	Platform	74	42.630000	0.576081
3	Role-Playing	292	145.890000	0.499623
4	Racing	85	39.890000	0.469294
5	Fighting	80	35.310000	0.441375
6	Action	769	322.500000	0.419376
7	Misc	156	63.060000	0.404231
8	Simulation	62	21.760000	0.350968
9	Puzzle	17	3.170000	0.186471
10	Strategy	56	10.080000	0.180000
11	Adventure	245	23.640000	0.096490

	index	name
0	475	wwe Smackdown vs. Raw 2006
1	627	Rock Band